**************************************************
*This file cleans graduation rates from the IPEDS*
**************************************************

*** create dta files from csv, labeling
cd "$raw_data_education/graduationRates"

foreach i of num 1997/2007 2009/2014 2016/2019 {
clear
cap do "$do_clean_education/IPEDS - do/do graduationRates/gr`i'.do"

gen datasetYear = `i'
gen cohort4years = `i' - 6
gen cohort2years = `i' - 3
gen cohortYear = cohort4years if cohort <= 3
replace cohortYear = cohort2years if cohort == 4 | cohort == 5
	drop cohort2years cohort4years

keep if chrtstat >= 12 & chrtstat <= 13
keep if section == 1 | section == 4
 
if `i' >= 1997 & `i' <= 2007 {
	keep unitid cohort* datasetYear chrtstat section grrace03 grrace04 grrace09 grrace10 grrace11 grrace12 grrace15 grrace16 grrace18 grrace21 grrace22 grrace24
	rename grrace03 blackMen
	rename grrace04 blackWomen
	rename grrace09 hispanicMen
	rename grrace10 hispanicWomen
	rename grrace11 whiteMen
	rename grrace12 whiteWomen
	rename grrace15 men
	rename grrace16 women
	rename grrace18 black
	rename grrace21 hispanic
	rename grrace22 white
	rename grrace24 total
	}
	
if `i' >= 2008 & `i' <= 2010 {	
	keep unitid cohort* datasetYear chrtstat section dvgrbkm dvgrbkw dvgrbkt dvgrhsm dvgrhsw dvgrhst dvgrwhm dvgrwhw dvgrwht grtotlt grtotlm grtotlw
	rename dvgrbkm blackMen
	rename dvgrbkw blackWomen
	rename dvgrbkt black
	rename dvgrhsm hispanicMen
	rename dvgrhsw hispanicWomen
	rename dvgrhst hispanic
	rename dvgrwhm whiteMen
	rename dvgrwhw whiteWomen
	rename dvgrwht white 
	rename grtotlm men
	rename grtotlw women
	rename grtotlt total
	}
	
if `i' >= 2011 {	
	keep unitid cohort* datasetYear chrtstat section grbkaam grbkaaw grbkaat grhispm grhispw grhispt grwhitm grwhitw grwhitt grtotlm grtotlw grtotlt
	rename grbkaam blackMen
	rename grbkaaw blackWomen
	rename grbkaat black
	rename grhispm hispanicMen
	rename grhispw hispanicWomen
	rename grhispt hispanic
	rename grwhitm whiteMen
	rename grwhitw whiteWomen
	rename grwhitt white 
	rename grtotlm men
	rename grtotlw women
	rename grtotlt total
	}

gen grad = "_adjCoh_4y" if chrtstat == 12 & cohort == 1
	replace grad = "_adjCoh_2y" if chrtstat == 12 & cohort == 4
	replace grad = "_comp150_4y" if chrtstat == 13 & cohort == 1
	replace grad = "_comp150_2y" if chrtstat == 13 & cohort == 4
	replace grad = "_adjCoh_L2y" if chrtstat == 12 & cohort == 5
	replace grad = "_comp150_L2y" if chrtstat == 13 & cohort == 5
	drop chrtstat section cohort

reshape wide blackMen blackWomen hispanicMen hispanicWomen whiteMen whiteWomen men women black hispanic white total , i(unitid) j(grad) string
	
save "$clean_data_education/Temporary5_`i'.dta", replace 
}

*** append
use "$clean_data_education/Temporary5_1997.dta", clear
foreach i of num 1998/2007 2009/2014 2016/2019 {
	append using "$clean_data_education/Temporary5_`i'.dta"
	rm "$clean_data_education/Temporary5_`i'.dta"
	}
rm "$clean_data_education/Temporary5_1997.dta"

order unitid datasetYear cohortYear
keep if (cohortYear >= 1992 & cohortYear <= 1995) | (cohortYear >= 1999 & cohortYear <= 2001) | (cohortYear >= 2006 & cohortYear <= 2008) | (cohortYear >= 2013 & cohortYear <= 2015)	
save "$clean_data_education/graduationRates.dta", replace 

*************************
clear
cd "$raw_data_education/graduationRates/csv graduationRatesL2"

foreach i of num 1999/2001 2004/2019 {
cap do "$do_clean_education/IPEDS - do/do graduationRates/do graduationRatesL2/gr`i'_l2.do"
	keep unitid line_50 line_11
	gen datasetYear = `i'
	gen cohortYear =  `i' - 3
		rename (line_50 line_11) (total_adjCoh_L2y total_comp150_L2y)
		label variable total_adjCoh_L2y "_adjCoh_L2y total"
		label variable total_comp150_L2y "_comp150_L2y total"
	save "$clean_data_education/Temporary6_`i'.dta", replace 
	clear
	}

*** append
use "$clean_data_education/Temporary6_1999.dta", clear
foreach i of num 2000/2001 2004/2019 {
	append using "$clean_data_education/Temporary6_`i'.dta"
	rm "$clean_data_education/Temporary6_`i'.dta"
	}
	
order unitid datasetYear cohortYear
keep if (cohortYear >= 1992 & cohortYear <= 1995) | (cohortYear >= 1999 & cohortYear <= 2001) | (cohortYear >= 2006 & cohortYear <= 2008) | (cohortYear >= 2013 & cohortYear <= 2015)	
save "$clean_data_education/graduationRatesL2.dta", replace 
	rm "$clean_data_education/Temporary6_1999.dta"

** build a unique file 
use "$clean_data_education/graduationRates.dta", clear
append using "$clean_data_education/graduationRatesL2.dta"

* final cleanings
gen year = cohortYear

do "$do_clean_education/clean institutions - id.do"
	drop year 

*** Build three year averages
* year group
gen yearGroup = 0
	replace yearGroup = 1990 if cohortYear <= 1991 
	replace yearGroup = 1994 if cohortYear >= 1992 & cohortYear <= 1994
	replace yearGroup = 2000 if cohortYear >= 1999 & cohortYear <= 2001
	replace yearGroup = 2007 if cohortYear >= 2006 & cohortYear <= 2008
	replace yearGroup = 2014 if cohortYear >= 2013
	drop if yearGroup == 0

**********************************************************
* Run it all together, save labels and collapse
foreach v of var * {
local l`v' : variable label `v'
	if `"`l`v''"' == "" {
	local l`v' "`v'"
  	}
}
collapse (last) datasetYear (mean) blackMen_adjCoh_2y-total_comp150_L2y, by(unitid yearGroup)

foreach v of var * {
label var `v' "`l`v''"
}
**********************************************************

save "$clean_data_education/graduationRatesReshaped.dta", replace
